# Problem : Can these models be used to predict how lawmakers may vote?

#Prepare a clean R environment in work space.
rm(list=ls())

#Use setwd() to navigate the data directory and specify desired folder. Here we are using Rstudio Editor directory.
setwd(dirname(rstudioapi::getSourceEditorContext()$path))

#Import our csv file data
data=read.csv("VotingData.csv",header=TRUE) #Load data

TrainingPct=0.6 #Percent of data to train model on
TrainingSample=floor(TrainingPct*dim(data)[1])  #Number of observations to train the model on
TestSample=dim(data)[1]-TrainingSample #Number of observations to test the model on

TrainingData=data[1:TrainingSample,]  #Get the training data

#Find probabilities associtaed with democrat voting
DemData=subset(TrainingData,TrainingData$Party=="democrat")

#Store All Probabilities in a Matrix (2 rows, across all votes)
ProbMat=matrix(0,2,(dim(DemData)[2]-3+1+1))

m=2 #Equivalent sample size for Laplacian correction
p=1/2  #Prior probability for Laplacian correction

for (j in 3:dim(DemData)[2])
{
  ProbMat[1,j-2]=(sum(DemData[,j]=="y")+m*p)/(dim(DemData)[1]+m)
  
}

#Find Probabilities Associated with Republican Voting
GOPData=subset(TrainingData,TrainingData$Party=="republican")

for (j in 3:dim(GOPData)[2])
{
  ProbMat[2,j-2]=(sum(GOPData[,j]=="y")+m*p)/(dim(GOPData)[1]+m)
  
}

#Tag on marignal probabilities
FinalInd=dim(ProbMat)[2]
ProbMat[1:2,FinalInd]=c(sum(TrainingData$Party=="democrat")/dim(TrainingData)[1],sum(TrainingData$Party=="republican")/dim(TrainingData)[1])
colnames(ProbMat)=c(names(data)[3:dim(data)[2]],"MargProb")
rownames(ProbMat)=unique(TrainingData$Party)

TestData=data[(TrainingSample+1):dim(data)[1],]
AssignedMat=matrix(0,dim(TestData)[1],3)


#Use the NB classifier on test Data
VotingModel_fn<-function(TestVec,ProbMat){
  
  
  ProbTestMat=matrix(0,2,dim(ProbMat)[2])
  #TestVec is the member of interests' vote record
  
  for (j in 1:length(TestVec)){
    for (k in 1:2){
      #Compute probabilities if vote yes or no via if loop
      
      if (TestVec[j]=="y"){
        ProbTestMat[k,j]=ProbMat[k,j]
      }  else  {
        ProbTestMat[k,j]=1-ProbMat[k,j]
      }
    }
  }
  
  ProbTestMat[1:2,(length(TestVec)+1)]=ProbMat[1:2,(length(TestVec)+1)]
  Probs=apply(ProbTestMat,1,prod)  #Compute product of probabilities for the candidate being of either party
  ind=which.max(Probs)  #Find which probability is higher
  AssignedVec=c(Probs,unique(TrainingData$Party)[ind])  #Probability of being a democrat, being a republican, and which one is assigned
  
  
  return(list(AssignedVec=as.numeric(AssignedVec[1:2]),AssignedParty=AssignedVec[3]))  #Elements returned as a list.
}


for (i in 1:dim(TestData)[1]){
  for (j in 3:dim(TestData)[2]){
  
  TestVec=TestData[i,3:dim(TestData)[2]]
  result<-VotingModel_fn(TestVec,ProbMat)
  AssignedMat[i,]=c(as.numeric(result$AssignedVec),result$AssignedParty)
  }
}
  

CheckMat=data.frame(cbind(TestData$Party,AssignedMat[,3]))
colnames(CheckMat)=c("Actual","Assigned")
Pct_Accuracy=sum(CheckMat$Actual==CheckMat$Assigned)/dim(TestData)[1]  #computes the percent accuracy


print("Classifier Percent Accuracy") #Print our accuracy as percent value.
print(Pct_Accuracy)

Example=read.csv("ArbitraryMember.csv")
result<-VotingModel_fn(Example,ProbMat)
print(result)

#Executing function in a sample data set to predict likelihood of voting
Example=read.csv("ArbitraryMember.csv")  #load data

result<-VotingModel_fn(Example,ProbMat)
print(result)

